package ai.koog.prompt.executor.clients.openai.models

import ai.koog.prompt.executor.clients.openai.base.models.OpenAIAudioConfig
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIBaseLLMRequest
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIBaseLLMResponse
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIBaseLLMStreamResponse
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIChoiceLogProbs
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIMessage
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIModalities
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIResponseFormat
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIStaticContent
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIStreamChoice
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIStreamOptions
import ai.koog.prompt.executor.clients.openai.base.models.OpenAITool
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIToolChoice
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIUsage
import ai.koog.prompt.executor.clients.openai.base.models.OpenAIWebSearchOptions
import ai.koog.prompt.executor.clients.openai.base.models.ReasoningEffort
import ai.koog.prompt.executor.clients.openai.base.models.ServiceTier
import ai.koog.prompt.executor.clients.serialization.AdditionalPropertiesFlatteningSerializer
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.JsonElement
import kotlin.collections.List

/**
 * Chat completion request.
 *
 * see [Chat Completion](https://platform.openai.com/docs/api-reference/chat/create)
 *
 * @property messages A list of messages comprising the conversation so far.
 * Depending on the [model] you use, different message types ([modalities]) are supported,
 * like [text][Content.Text], [images][OpenAIContentPart.Image] and [audio][OpenAIContentPart.Audio].
 * @property model Model ID used to generate the response, like `gpt-4o` or `o3`.
 * OpenAI offers a wide range of models with different capabilities, performance characteristics and price points.
 * @property audio Parameters for audio output. Required when audio output is requested with `modalities: ["audio"]`.
 * @property frequencyPenalty Number between -2.0 and 2.0.
 * Positive values penalize new tokens based on their existing frequency in the text so far,
 * decreasing the model's likelihood to repeat the same line verbatim.
 * @property logitBias Modify the likelihood of specified tokens appearing in the completion.
 *
 * Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer)
 * to an associated bias value from -100 to 100.
 * Mathematically, the bias is added to the logits generated by the model prior to sampling.
 * The exact effect will vary per model,
 * but values between -1 and 1 should decrease or increase the likelihood of selection;
 * values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
 * @property logprobs Whether to return log probabilities of the output tokens or not.
 * If true, returns the log probabilities of each output token returned in the `content` of `message`.
 * @property maxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,
 * including visible output tokens and reasoning tokens.
 * @property maxTokens (`Deprecated`) The maximum number of tokens that can be generated in the chat completion.
 * This value can be used to control costs for text generated via API.
 *
 * This value is now deprecated in favor of [maxCompletionTokens], and is not compatible with `o-series` models.
 * @property metadata Set of 16 key-value pairs that can be attached to an object.
 * This can be useful for storing additional information about the object in a structured format
 * and querying for objects via API or the dashboard.
 *
 * Keys are strings with a maximum length of 64 characters.
 * Values are strings with a maximum length of 512 characters.
 * @property modalities Output types that you would like the model to generate.
 * Most models are capable of generating text, which is the default:
 *
 * `["text"]`
 *
 * The gpt-4o-audio-preview model can also be used to generate audio.
 * To request that this model generate both text and audio responses, you can use:
 *
 * `["text", "audio"]`
 * @property numberOfChoices How many chat completion choices to generate for each input message.
 * Note that you will be charged based on the number of generated tokens across all the choices.
 * Keep `n` as `1` to minimize costs.
 * @property parallelToolCalls Whether to enable parallel function calling during tool use.
 * @property prediction Configuration for a [Predicted Output](https://platform.openai.com/docs/guides/predicted-outputs),
 * which can greatly improve response times when large parts of the model response are known ahead of time.
 * This is most common when you are regenerating a file with only minor changes to most of the content.
 * @property presencePenalty Number between -2.0 and 2.0.
 * Positive values penalize new tokens based on whether they appear in the text so far,
 * increasing the model's likelihood to talk about new topics.
 * @property promptCacheKey Used by OpenAI to cache responses for similar requests to optimize your cache hit rates.
 * Replaces the `user` field.
 * @property reasoningEffort Constrains effort on reasoning for reasoning models.
 * Currently supported values are `low`, `medium`, and `high`.
 * Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
 * @property responseFormat An object specifying the format that the model must output.
 *
 * Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs,
 * which ensures the model will match your supplied JSON schema.
 *
 * Setting to `{ "type": "json_object" }` enables the older JSON mode,
 * which ensures the message the model generates is valid JSON.
 * Using `json_schema` is preferred for models that support it.
 * @property safetyIdentifier A stable identifier used to help detect users of your application
 * that may be violating OpenAI's usage policies.
 * The IDs should be a string that uniquely identifies each user.
 * We recommend hashing their username or email address to avoid sending us any identifying information.
 * @property seed This feature is in Beta.
 * If specified, our system will make the best effort to sample deterministically,
 * such that repeated requests with the same [seed] and parameters should return the same result.
 * Determinism is not guaranteed,
 * and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
 * @property serviceTier Specifies the processing type used for serving the request.
 *
 * - If set to `auto`, then the request will be processed with the service tier configured in the Project settings.
 * Unless otherwise configured, the Project will use 'default'.
 * - If set to `default`,
 * then the request will be processed with the standard pricing and performance for the selected model.
 * - If set to `flex` or `priority`, then the request will be processed with the corresponding service tier.
 * Contact sales to learn more about Priority processing.
 * - When not set, the default behavior is 'auto'.
 *
 * When the [serviceTier] parameter is set,
 * the response body will include the [serviceTier] value based on the processing mode
 * actually used to serve the request.
 * This response value may be different from the value set in the parameter.
 * @property stop Not supported with latest reasoning models `o3` and `o4-mini`.
 *
 * Up to 4 sequences where the API will stop generating further tokens.
 * The returned text will not contain the stop sequence.
 * @property store Whether to store the output of this chat completion
 * request for use in our model distillation or evals products.
 *
 * Supports text and image inputs.
 * Note: image inputs over 10MB will be dropped.
 * @property stream If set to true,
 * the model response data will be streamed to the client as it is generated using server-sent events.
 * @property streamOptions Options for streaming response. Only set this when you set `stream = true`.
 * @property temperature What sampling temperature to use, between 0 and 2.
 * Higher values like 0.8 will make the output more random,
 * while lower values like 0.2 will make it more focused and deterministic.
 * We generally recommend altering this or [topP] but not both.
 * @property toolChoice Controls which (if any) tool is called by the model.
 * `none` means the model will not call any tool and instead generates a message.
 * `auto` means the model can pick between generating a message or calling one or more tools.
 * `required` means the model must call one or more tools.
 * Specifying a particular tool via
 * `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
 *
 * `none` is the default when no tools are present.
 * `auto` is the default if tools are present.
 * @property tools A list of tools the model may call.
 * Currently, only functions are supported as a tool.
 * Use this to provide a list of functions the model may generate JSON inputs for.
 * A max of 128 functions is supported.
 * @property topLogprobs An integer between 0 and 20 specifying the number of most likely tokens
 * to return at each token position, each with an associated log probability.
 * [logprobs] must be set to `true` if this parameter is used.
 * @property topP An alternative to sampling with temperature, called nucleus sampling,
 * where the model considers the results of the tokens with top_p probability mass.
 * So 0.1 means only the tokens comprising the top 10% probability mass are considered.
 *
 * We generally recommend altering this or [temperature] but not both.
 * @property user (`Deprecated`) This field is being replaced by [safetyIdentifier] and [promptCacheKey].
 * Use [promptCacheKey] instead to maintain caching optimizations.
 * A stable identifier for your end-users.
 * Used to boost cache hit rates by better bucketing similar requests and to help OpenAI detect and prevent abuse.
 * @property webSearchOptions This tool searches the web for relevant results to use in a response.
 */
@Serializable
internal class OpenAIChatCompletionRequest(
    val messages: List<OpenAIMessage>,
    override val model: String,
    val audio: OpenAIAudioConfig? = null,
    val frequencyPenalty: Double? = null,
    val logitBias: Map<String, Int>? = null,
    val logprobs: Boolean? = null,
    val maxCompletionTokens: Int? = null,
    val maxTokens: Int? = null,
    val metadata: Map<String, String>? = null,
    val modalities: List<OpenAIModalities>? = null,
    @SerialName("n")
    val numberOfChoices: Int? = null,
    val parallelToolCalls: Boolean? = null,
    val prediction: OpenAIStaticContent? = null,
    val presencePenalty: Double? = null,
    val promptCacheKey: String? = null,
    val reasoningEffort: ReasoningEffort? = null,
    val responseFormat: OpenAIResponseFormat? = null,
    val safetyIdentifier: String? = null,
    val seed: Int? = null,
    val serviceTier: ServiceTier? = null,
    val stop: List<String>? = null,
    val store: Boolean? = null,
    override val stream: Boolean? = null,
    val streamOptions: OpenAIStreamOptions? = null,
    override val temperature: Double? = null,
    val toolChoice: OpenAIToolChoice? = null,
    val tools: List<OpenAITool>? = null,
    override val topLogprobs: Int? = null,
    override val topP: Double? = null,
    val user: String? = null,
    val webSearchOptions: OpenAIWebSearchOptions? = null,
    val additionalProperties: Map<String, JsonElement>? = null,
) : OpenAIBaseLLMRequest

/**
 * Chat completion choice
 *
 * @property finishReason The reason the model stopped generating tokens.
 * This will be `stop` if the model hit a natural stop point or a provided stop sequence,
 * `length` if the maximum number of tokens specified in the request was reached,
 * `content_filter` if content was omitted due to a flag from our content filters,
 * `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function.
 * @property index The index of the choice in the list of choices.
 * @property logprobs Log probability information for the choice.
 * @property message A chat completion message generated by the model.
 *
 * See [choices](https://platform.openai.com/docs/api-reference/chat/object#chat/object-choices)
 */
@Serializable
public class OpenAIChoice(
    public val finishReason: String,
    public val index: Int,
    public val logprobs: OpenAIChoiceLogProbs? = null,
    public val message: OpenAIMessage,
)

/**
 * Represents the response from the OpenAI chat completion API.
 *
 * @property choices A list of chat completion choices. Can be more than one if `n` is greater than 1.
 * @property created The Unix timestamp (in seconds) of when the chat completion was created.
 * @property id A unique identifier for the chat completion.
 * @property model The model used for the chat completion.
 * @property objectType The object type, which is always `chat.completion`.
 * @property serviceTier Specifies the processing type used for serving the request.
 *
 * - If set to 'auto', then the request will be processed with the service tier configured in the Project settings.
 * Unless otherwise configured, the Project will use 'default'.
 * - If set to 'default',
 * then the request will be processed with the standard pricing and performance for the selected model.
 * - If set to 'flex' or 'priority', then the request will be processed with the corresponding service tier.
 * Contact sales to learn more about Priority processing.
 * - When not set, the default behavior is 'auto'.
 *
 * When the [serviceTier] parameter is set,
 * the response body will include the [serviceTier] value based on the processing
 * mode actually used to serve the request.
 * This response value may be different from the value set in the parameter.
 * @property systemFingerprint This fingerprint represents the backend configuration that the model runs with.
 *
 * Can be used in conjunction with the `seed` request parameter
 * to understand when backend changes have been made that might impact determinism.
 * @property usage Usage statistics for the completion request.
 *
 * See [The chat completion object](https://platform.openai.com/docs/api-reference/chat/object)
 */
@Serializable
public class OpenAIChatCompletionResponse(
    public val choices: List<OpenAIChoice>,
    public override val created: Long,
    public override val id: String,
    public override val model: String,
    public val serviceTier: String? = null,
    public val systemFingerprint: String? = null,
    @SerialName("object")
    public val objectType: String,
    public val usage: OpenAIUsage? = null,
) : OpenAIBaseLLMResponse

/**
 * Represents the stream response from the OpenAI chat completion API.
 *
 * @property choices A list of chat completion choices.
 * Can contain more than one element if `n` is greater than 1.
 * Can also be empty for the last chunk if you set `stream_options: {"include_usage": true}`.
 * @property created The Unix timestamp (in seconds) of when the chat completion was created.
 * @property id A unique identifier for the chat completion.
 * @property model The model used for the chat completion.
 * @property objectType The object type, which is always `chat.completion`.
 * @property serviceTier Specifies the processing type used for serving the request.
 *
 * - If set to 'auto', then the request will be processed with the service tier configured in the Project settings.
 * Unless otherwise configured, the Project will use 'default'.
 * - If set to 'default',
 * then the request will be processed with the standard pricing and performance for the selected model.
 * - If set to 'flex' or 'priority', then the request will be processed with the corresponding service tier.
 * Contact sales to learn more about Priority processing.
 * - When not set, the default behavior is 'auto'.
 *
 * When the [serviceTier] parameter is set,
 * the response body will include the [serviceTier] value based on the processing
 * mode actually used to serve the request.
 * This response value may be different from the value set in the parameter.
 * @property systemFingerprint This fingerprint represents the backend configuration that the model runs with.
 *
 * Can be used in conjunction with the `seed` request parameter
 * to understand when backend changes have been made that might impact determinism.
 * @property usage Usage statistics for the completion request.
 *
 * See [The chat completion chunk object](https://platform.openai.com/docs/api-reference/chat-streaming/streaming)
 */
@Serializable
public class OpenAIChatCompletionStreamResponse(
    public val choices: List<OpenAIStreamChoice>,
    public override val created: Long,
    public override val id: String,
    public override val model: String,
    public val serviceTier: String? = null,
    public val systemFingerprint: String? = null,
    @SerialName("object")
    public val objectType: String,
    public val usage: OpenAIUsage? = null,
) : OpenAIBaseLLMStreamResponse

internal object OpenAIChatCompletionRequestSerializer :
    AdditionalPropertiesFlatteningSerializer<OpenAIChatCompletionRequest>(OpenAIChatCompletionRequest.serializer())
